* Iris Arbogast (irisarbogast@gmail.com)
* 8/29/2023
/* This program gets the ACS data ready for regressions. It links children with parents and creates dummies for different demographic 
information. It drops a few subgroups (i.e. children with NA for family income), combines the data with the policy data and labels everything. 
*/

* import the ACS data the replicator has pulled from IPUMS 
use Data/ACS.dta, clear

* rename key variables
rename hinscaid Medicaid
rename hcovany AnyCoverage


*************************
* Add parents to each kid's observation (if they exist)
*************************

* Match parents with kids
preserve
keep educ serial speakeng citizen pernum year
rename educ educ_dad
rename speakeng speakeng_dad
rename pernum poploc
rename citizen citizen_dad
tempfile dad
save `dad'
restore
merge m:1 year poploc serial using `dad' 
drop if _merge == 2
drop _merge

preserve
keep educ serial speakeng citizen pernum year
rename educ educ_mom
rename speakeng speakeng_mom
rename pernum momloc
rename citizen citizen_mom
tempfile mom
save `mom'
restore
merge m:1 year momloc serial using `mom' 
drop if _merge == 2
drop _merge


*************************
* Create demographic groups for children
*************************
gen all = 1

* race/ethnicity

gen black = .
replace black = 0 if !(missing(race))
replace black = 1 if race == 2 & !(missing(race))

gen white = .
replace white = 0 if !(missing(race))
replace white = 1 if race == 1 & !(missing(race))

gen raceother = .
replace raceother = 0 if !(missing(race))
replace raceother = 1 if (race != 1 & race != 2) & !(missing(race))

gen hispanic = .
replace hispanic = 0 if !(missing(hispan))
replace hispanic = 1 if hispan != 0

gen nothispanic = .
replace nothispanic = 0 if !(missing(hispan))
replace nothispanic = 1 if hispan == 0


* income 

gen fpl100 = .
replace fpl100 = 0 if !(missing(poverty))
replace fpl100 = 1 if (poverty > 0 & poverty < 100) & !(missing(poverty))

gen fpl200 = .
replace fpl200 = 0 if !(missing(poverty))
replace fpl200 = 1 if (poverty >= 100 & poverty < 200) & !(missing(poverty))

gen fpl2to400 = .
replace fpl2to400 = 0 if !(missing(poverty))
replace fpl2to400 = 1 if (poverty >= 200 & poverty < 400) & !(missing(poverty))

gen fpl400 = .
replace fpl400 = 0 if !(missing(poverty))
replace fpl400 = 1 if (poverty >= 400) & !(missing(poverty))

* citizenship

gen noncit = 0
replace noncit = 1 if (citizen == 3)

gen cit = 0
replace cit = 1 if (citizen != 3)


* gender

gen female = 0
replace female = 1 if (sex == 2)

gen male = 0
replace male = 1 if (sex == 1)


* buckets

gen race_bucket = .
replace race_bucket = 0 if white == 1
replace race_bucket = 1 if black == 1
replace race_bucket = 2 if raceother == 1


gen inc_bucket = .
replace inc_bucket = 1 if fpl100 == 1
replace inc_bucket = 2 if fpl200 == 1
replace inc_bucket = 3 if fpl2to400 == 1
replace inc_bucket = 0 if fpl400 == 1


* age groups

* Generate age bucket variable
gen age_bucket = .
replace age_bucket = 1 if age < 1 & !missing(age)
replace age_bucket = 2 if age >= 1 & age <= 5 & !missing(age)
replace age_bucket = 3 if age >= 6 & age <= 12 & !missing(age)
replace age_bucket = 4 if age >= 13 & !missing(age)


gen infants = 0
replace infants = 1 if age_bucket == 1

gen Ageoneto5 = 0
replace Ageoneto5 = 1 if age_bucket == 2

gen Agesixto12 = 0
replace Agesixto12 = 1 if age_bucket == 3

gen Age13up = 0
replace Age13up = 1 if age_bucket == 4

* 18 yrold group
gen no18yr = .
replace no18yr = 0 if (age > 17) & !missing(age)
replace no18yr = 1 if (age < 18) & !missing(age)



***********
*Variables for children's parents
************

* Noncitizen (parents)
gen noncitp = .
* subset to only children with parents
replace noncitp = 0 if (!missing(citizen_dad) | !missing(citizen_mom))
replace noncitp = 1 if ((citizen_dad == 3 & !missing(citizen_dad)) | (citizen_mom == 3 & !missing(citizen_mom)))

gen citp = .
replace citp = 0 if (!missing(citizen_dad) | !missing(citizen_mom))
replace citp = 1 if ((citizen_dad != 3 & citizen_mom != 3) & (!missing(citizen_dad) | !missing(citizen_mom)))


* Weak english (parents)
gen engtbp = .
replace engtbp = 0 if (!missing(speakeng_dad) | !missing(speakeng_mom))
replace engtbp = 1 if ((speakeng_dad == 1 | speakeng_dad == 6 | speakeng_mom == 1 | speakeng_mom == 6) & (!missing(speakeng_dad) | !missing(speakeng_mom)))

gen engnotbp = .
replace engnotbp = 0 if (!missing(speakeng_dad) | !missing(speakeng_mom))
replace engnotbp = 1 if ((speakeng_dad != 1 & speakeng_dad != 6 & speakeng_mom != 1 & speakeng_mom != 6) & (!missing(speakeng_dad) | !missing(speakeng_mom)))

gen educ_bucket_p = .
replace educ_bucket_p = 0 if (!missing(educ_dad) | !missing(educ_mom))
replace educ_bucket_p = (educ_dad) if (missing(educ_mom) | (!missing(educ_dad) & !missing(educ_mom) & educ_dad >= educ_mom))
replace educ_bucket_p = (educ_mom) if (missing(educ_dad) | (!missing(educ_dad) & !missing(educ_mom) & educ_mom >= educ_dad))


* Generate aggregate education variable
gen educ_ag = .
replace educ_ag = 1 if educ_bucket_p < 6 & !missing(educ_bucket_p)
replace educ_ag = 2 if educ_bucket_p == 6 & !missing(educ_bucket_p)
replace educ_ag = 3 if educ_bucket_p > 6 & educ_bucket_p < 10 & !missing(educ_bucket_p)
replace educ_ag = 4 if (educ_bucket_p == 10 | educ_bucket_p == 11) & !missing(educ_bucket_p)



* College
gen yr4clgp = .
replace yr4clgp = 0 if (!missing(educ_dad) | !missing(educ_mom))
replace yr4clgp = 1 if ((educ_dad > 9 & !missing(educ_dad)) | (educ_mom > 9 & !missing(educ_mom)))


gen noyr4clgp = .
replace noyr4clgp = 0 if yr4clgp == 1
replace noyr4clgp = 1 if yr4clgp == 0


save Data/Clean/ACS_temp.dta, replace


*************************
* Merge in policy data
*************************

use Data/Clean/Cleaned_Policy_Data.dta, clear
duplicates drop

collapse (mean) GSP ur pov cutoffmax Premlevel_201  (sum) str_check freq_check automatic_drop	 Adminburden public_charge_2017 public_charge_2019 new_redetpause MedicaidExpansion Work_Reqs, by(year st stusps)

* treat as 1 if in place for more than six months
foreach variable in str_check freq_check automatic_drop	 Adminburden public_charge_2017 public_charge_2019 new_redetpause MedicaidExpansion Work_Reqs {

replace `variable' = 0 if `variable' <= 6 & !(missing(`variable'))
replace `variable' = 1 if `variable' > 6 & !(missing(`variable'))

}

* generate lags 
tsset st year

gen L_Adminburden = L.Adminburden
gen L2_Adminburden = L2.Adminburden
gen L3_Adminburden = L3.Adminburden
gen L4_Adminburden = L4.Adminburden
gen L5_Adminburden = L5.Adminburden
gen L6_Adminburden = L6.Adminburden

gen L_new_redetpause = L.new_redetpause
gen L2_new_redetpause = L2.new_redetpause
gen L3_new_redetpause = L3.new_redetpause
gen L4_new_redetpause = L4.new_redetpause
gen L5_new_redetpause = L5.new_redetpause
gen L6_new_redetpause = L6.new_redetpause

gen L_MedicaidExpansion = L.MedicaidExpansion
gen L2_MedicaidExpansion = L2.MedicaidExpansion
gen L3_MedicaidExpansion = L3.MedicaidExpansion
gen L4_MedicaidExpansion = L4.MedicaidExpansion
gen L5_MedicaidExpansion = L5.MedicaidExpansion
gen L6_MedicaidExpansion = L6.MedicaidExpansion

gen L_cutoffmax = L.cutoffmax
gen L2_cutoffmax = L2.cutoffmax
gen L3_cutoffmax = L3.cutoffmax
gen L4_cutoffmax = L4.cutoffmax
gen L5_cutoffmax = L5.cutoffmax
gen L6_cutoffmax = L6.cutoffmax

gen L_Premlevel_201 = L.Premlevel_201
gen L2_Premlevel_201 = L2.Premlevel_201
gen L3_Premlevel_201 = L3.Premlevel_201
gen L4_Premlevel_201 = L4.Premlevel_201
gen L5_Premlevel_201 = L5.Premlevel_201
gen L6_Premlevel_201 = L6.Premlevel_201

gen L_Work_Reqs = L.Work_Reqs
gen L2_Work_Reqs = L2.Work_Reqs
gen L3_Work_Reqs = L3.Work_Reqs
gen L4_Work_Reqs = L4.Work_Reqs
gen L5_Work_Reqs = L5.Work_Reqs
gen L6_Work_Reqs = L6.Work_Reqs

gen L_pov = L.pov
gen L2_pov = L2.pov
gen L3_pov = L3.pov
gen L4_pov = L4.pov
gen L5_pov = L5.pov
gen L6_pov = L6.pov

gen L_ur = L.ur
gen L2_ur = L2.ur
gen L3_ur = L3.ur
gen L4_ur = L4.ur
gen L5_ur = L5.ur
gen L6_ur = L6.ur

gen L_GSP = L.GSP
gen L2_GSP = L2.GSP
gen L3_GSP = L3.GSP
gen L4_GSP = L4.GSP
gen L5_GSP = L5.GSP
gen L6_GSP = L6.GSP


rename st statefip

* merge back in with ACS data
merge 1:m statefip year using Data/Clean/ACS_temp.dta
drop _merge


*************************
* Drop some observations, as explained in the paper
*************************

* drop if family total income is NA
drop if ftotinc == 9999999

* adjust for inflation
replace ftotinc = ftotinc*cpi99

* put family income in terms of thousands to make regression easier to read
replace ftotinc = ftotinc/1000
* drop if family income > 1 million
drop if ftotinc > 1000


* drop DC
drop if statefip == 11

* drop if no data for poverty level
drop if poverty == 0



*************************
* Clean up dependent variables
*************************

gen Medicaid_covered = 0
replace Medicaid_covered = 1 if (Medicaid == 2 & !missing(Medicaid))


gen Any_Covered = .
replace Any_Covered = 0 if !missing(AnyCoverage)
replace Any_Covered = 1 if (AnyCoverage == 2 & !missing(AnyCoverage))


*************************
* Labels
*************************


label define inc_bucket_lbl 0 `"FPL > 400"', modify
label define inc_bucket_lbl 1 `"FPL < 100"', modify
label define inc_bucket_lbl 2 `"FPL 100-200"', modify
label define inc_bucket_lbl 3 `"FPL 200-400"', modify
label values inc_bucket inc_bucket_lbl


label define hispan_lbl 0 `"Not Hispanic"', modify
label define hispan_lbl 1 `"Hispanic"', modify
label values hispan hispan_lbl


label define age_bucket_lbl 1 `"Infants"', add
label define age_bucket_lbl 2 `"Ages 1 - 5"', add
label define age_bucket_lbl 3 `"Ages 6 - 12"', add
label define age_bucket_lbl 4 `"Ages 13+"', add
label values age_bucket age_bucket_lbl


label define race_bucket_lbl 0 `"White"', add
label define race_bucket_lbl 1 `"Black"', add
label define race_bucket_lbl 2 `"Other Race"', modify
label values race_bucket race_bucket_lbl


label define educ_ag_lbl 1 `"< HS"', add
label define educ_ag_lbl 2 `"HS"', add
label define educ_ag_lbl 3 `"Some College"', add
label define educ_ag_lbl 4 `"College+"', add
label values educ_ag educ_ag_lbl




lab var educ_ag    "Education"
lab var age_bucket    "Age"
lab var hispanic    "Hispanic"
lab var inc_bucket    "Income"
lab var cit    "Citizen"
lab var Adminburden    "Adminburden"
lab var new_redetpause    "Redetermination Pause"
lab var Any_Covered    "Any Coverage"
lab var MedicaidExpansion    "Medicaid Expansion"
lab var cutoffmax    "Maximum Income Cutoff as % FPL"
lab var L_pov    "Child Poverty Rate"
lab var L_ur    "Unemployment Rate"
lab var L_GSP    "Gross State Product Per Capita"
lab var pov    "Child Poverty Rate"
lab var ur    "Unemployment Rate"
lab var GSP    "Gross State Product Per Capita"
lab var Premlevel_201 "Premiums at 201% FPL 1 child"
lab var Work_Reqs "Work Requirements"
lab var Adminburden "Increase in Administrative Burden"
lab var automatic_drop `"Automatic Drop"'
lab var freq_check `"Check Frequency"'
lab var str_check `"Check Stringency"'
lab var noyr4clgp  	"No Parents with 4 years of College"
lab var yr4clgp 	"Parent with 4 years of College"
lab var noncitp "Non-Citizen Parent"
lab var citp "Citizen Parents"
lab var engtbp "Weak English Parent"
lab var engnotbp "Without Weak English Parent"
lab var public_charge_2019    "Public Charge Announcement"
lab var public_charge_2017    "Trump Administration"


save Data/Clean/CleanedACSData.dta, replace


*************************
* Export ACS data by year for R 
*************************
keep if year == 2016 & age < 19
save Data/Clean/CleanedACSData_2016.dta, replace


use Data/Clean/CleanedACSData.dta, clear
keep if year == 2010 & age < 19
save Data/Clean/CleanedACSData_2010.dta, replace



use Data/Clean/CleanedACSData.dta, clear
keep if year == 2019 & age < 19
save Data/Clean/CleanedACSData_2019.dta, replace



